#IMPORTANT NOTE: everything for plotting in figures currently comes from the second_timepoint object, but there are additional chunks that run similar analyses on all_data objects with different cluster numbers
#Set working directory to appropriate folder for inputs and outputs on Google Drive
#Initialize
rm(list = ls())
library(dplyr)
library(Seurat)
library(ggplot2)
library(RColorBrewer)
library(xlsx)
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Preprocess_GEX/Objects_premerged.RData')
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Preprocess_GEX/second_timepoint_merged.RData')
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Preprocess_GEX/first_timepoint_merged.RData')
second_timepoint <- NormalizeData(second_timepoint)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
second_timepoint <- FindVariableFeatures(second_timepoint, selection.method = 'vst', nFeatures = 20000)
Warning: The following arguments are not used: nFeatures
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
second_timepoint <- ScaleData(second_timepoint)
Centering and scaling data matrix
|
| | 0%
|
|================================================================ | 50%
|
|===============================================================================================================================| 100%
second_timepoint <- RunPCA(second_timepoint)
PC_ 1
Positive: IGFBP7, COL1A1, BASP1, TPM1, CAV1, ARL4C, FN1, RTN4, MMP2, TXNRD1
C12orf75, CALD1, SPOCK1, THBS1, MYL6, ANXA1, COL6A1, TPM4, CALM2, IL6ST
COL6A2, MYOF, PRNP, C1R, TMSB4X, DSTN, CRIM1, ANXA2, SFRP1, CEMIP
Negative: PMEL, MLANA, LHFPL3-AS1, FRMD4B, DCT, S100B, CAPN3, PRDX1, BCAS3, APOD
RAMP1, VGF, RGS1, IRS2, C11orf96, RLBP1, CITED1, BAAT, AKR1A1, LINC01531
FABP7, SCML4, CEACAM1, ATP6V0D2, CSTB, EDNRB, HSPA1A, CPM, TRIM63, TRPM1
PC_ 2
Positive: MT2A, TMEM158, MMP1, STC1, CCND1, SCG2, IER3, PHLDA1, TFPI2, KYNU
VEGFA, SFRP1, LINC02376, WNT5A, SERPINB2, HPCAL1, AKR1B1, SIRPB1, GNG11, EREG
LTBP1, ITGA2, MYEOV, CD44, SLC16A6, MMP3, ANGPTL4, NRP1, TIMP3, MAP4K4
Negative: CRYAB, IFI6, S100B, AEBP1, SPARC, MFGE8, PMP22, SORBS2, C1orf198, LIMA1
PALLD, ANK3, CTSK, COL9A3, HSPG2, IFITM3, LIMCH1, GREM2, PLP1, SCRG1
GAS7, CEBPD, OLFML2A, ISG15, EPB41L3, AKAP12, IFI27, APOE, LMCD1, NUPR1
PC_ 3
Positive: MALAT1, FTH1, FTL, NEAT1, MMP1, FN1, NDRG1, SCG2, IGFBP5, RND3
TIMP1, CEBPD, GAS5, ADM, DEPP1, RGS2, IGFBP7, LINC00968, INHBA, JUN
STC1, ARL4C, UBC, IL6ST, TMSB4X, TFPI2, NNMT, PTGS2, FGF7, IER3
Negative: MKI67, CENPF, TPX2, ASPM, UBE2S, NUSAP1, GTSE1, ANLN, TOP2A, H2AFZ
BIRC5, PRC1, HIST1H1B, UBE2C, HIST1H4C, TMPO, TUBA1B, CEP55, CENPE, HMGB2
CCNB1, HMMR, TK1, NCAPG, PRR11, PCLAF, AURKB, UBE2T, DBF4, CDKN3
PC_ 4
Positive: OLFML2A, EPB41L3, HSPG2, COL9A3, NGFR, CRYAB, ITGA6, ITGA2, SCRG1, DLX5
SEMA3B, H19, TSPAN13, GAS7, NCAM1, TRIM58, NTRK2, SERPINE2, ANK3, NFATC2
TSC22D1, SLITRK6, MAP4K4, EVI5, PHACTR1, LRRTM4, AKAP12, GRIK2, HTRA1, MALAT1
Negative: BST2, IFI27, VCAM1, COL14A1, XAF1, APOE, SFRP4, LINC00968, C1R, LINC01914
GBP1, DEPP1, CTSK, CARD16, IFI44L, PDE5A, PLAAT4, GYPC, DCN, PSMB9
NUPR1, LBP, FOSB, TSLP, NNMT, CCL2, BGN, C1S, RPS27L, IFITM3
PC_ 5
Positive: VCAM1, IGFBP5, BOC, COL14A1, FOXF1, KYNU, PLAAT4, CTSC, MMD, CARD16
TRPA1, C1R, LBP, PDGFRA, IRF1, PMEPA1, PCDH18, WNT5A, FENDRR, CASP1
DCN, CSRP2, LAMB1, SOST, AGT, TBX2, MGST1, SLC7A11, FOXF2, FAM20C
Negative: CPA4, KRT34, SERPINE1, SMYD3, LINC01638, AC092807.3, DKK1, C12orf75, TNIK, FGF5
OXTR, SRGN, SPOCK1, CDA, CCN5, GRAMD2B, ARL4C, EPGN, SPOCD1, SCG5
MME, CRISPLD2, TPST2, STON2, TNFRSF12A, CSTB, RPS27L, GCNT1, LMO7, RASD2
second_timepoint <- FindNeighbors(second_timepoint, dims = 1:15)
Computing nearest neighbor graph
Computing SNN
second_timepoint <- FindClusters(second_timepoint, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 57996
Number of edges: 1751932
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.9022
Number of communities: 14
Elapsed time: 13 seconds
second_timepoint <- RunUMAP(second_timepoint, dims = 1:15)
09:22:25 UMAP embedding parameters a = 0.9922 b = 1.112
09:22:25 Read 57996 rows and found 15 numeric columns
09:22:25 Using Annoy for neighbor search, n_neighbors = 30
09:22:25 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
09:22:30 Writing NN index file to temp file /var/folders/ph/24prrxys02179y9_qzhxjgvc0000gn/T//RtmpH55A7T/file13f1572bdbb96
09:22:31 Searching Annoy index using 1 thread, search_k = 3000
09:22:48 Annoy recall = 100%
09:22:48 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
09:22:50 Initializing from normalized Laplacian + noise (using irlba)
09:22:52 Commencing optimization for 200 epochs, with 2501536 positive edges
Using method 'umap'
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
09:23:25 Optimization finished
cells_per_cluster <- data.frame (Cluster = as.numeric(second_timepoint$seurat_clusters), Condition = second_timepoint$OG_condition)
cells_per_cluster_list <- list()
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/original_condition_second_timepoint.pdf')
print(DimPlot(second_timepoint))
print(DimPlot(second_timepoint, label = T, group.by = 'OG_condition', cols = c('dabtramtodabtram' = '#561E59', 'dabtramtococl2' = '#A2248E', 'dabtramtocis' = '#9D85BE', 'cocl2todabtram' = '#10413B', 'cocl2tococl2' = '#6ABD45', 'cocl2tocis' = '#6DC49C', 'cistodabtram' = '#A23622', 'cistococl2' = '#F49129', 'cistocis' = '#FBD08C')))
for (i in 1:(max(cells_per_cluster$Cluster))){
currentcluster <- filter(cells_per_cluster, cells_per_cluster$Cluster == i)
cells_per_cluster_list[[paste0('Cluster', i)]] <- data.frame(table(currentcluster$Condition))
print(ggplot(cells_per_cluster_list[[paste0('Cluster', i)]], aes(x ='', y = Freq, fill = Var1)) + geom_bar(stat = 'identity') + coord_polar('y', start = 0) + theme_void() + scale_fill_manual(values = c('dabtramtodabtram' = '#561E59', 'dabtramtococl2' = '#A2248E', 'dabtramtocis' = '#9D85BE', 'cocl2todabtram' = '#10413B', 'cocl2tococl2' = '#6ABD45', 'cocl2tocis' = '#6DC49C', 'cistodabtram' = '#A23622', 'cistococl2' = '#F49129', 'cistocis' = '#FBD08C')) + labs(title = paste('Original condition of cells in cluster', i-1)))
}
Warning: stack imbalance in 'lapply', 83 then 81
dev.off()
null device
1
#editing cells_per_cluster so that the condition name is whatever second condition
#UMAPs highlighting all cells that had same first and second drug in second_timepoint object
cells_per_cluster_seconddrug <- cells_per_cluster
cells_per_cluster_firstdrug <- cells_per_cluster
for (i in 1:nrow(cells_per_cluster)){
if (cells_per_cluster$Condition[[i]] == "dabtramtodabtram" || cells_per_cluster$Condition[[i]] == "cistodabtram" || cells_per_cluster$Condition[[i]] == "cocl2todabtram") {
cells_per_cluster_seconddrug$Condition[[i]] <- "dabtramsecond"
}
if (cells_per_cluster$Condition[[i]] == "cocl2tococl2" || cells_per_cluster$Condition[[i]] == "cistococl2" || cells_per_cluster$Condition[[i]] == "dabtramtococl2") {
cells_per_cluster_seconddrug$Condition[[i]] <- "cocl2second"
}
if (cells_per_cluster$Condition[[i]] == "cistocis" || cells_per_cluster$Condition[[i]] == "cocl2tocis" || cells_per_cluster$Condition[[i]] == "dabtramtocis") {
cells_per_cluster_seconddrug$Condition[[i]] <- "cissecond"
}
}
for (i in 1:nrow(cells_per_cluster)){
if (cells_per_cluster$Condition[[i]] == "dabtramtodabtram" || cells_per_cluster$Condition[[i]] == "dabtramtocis" || cells_per_cluster$Condition[[i]] == "dabtramtococl2") {
cells_per_cluster_firstdrug$Condition[[i]] <- "dabtramfirst"
}
if (cells_per_cluster$Condition[[i]] == "cocl2tococl2" || cells_per_cluster$Condition[[i]] == "cocl2tocis" || cells_per_cluster$Condition[[i]] == "cocl2todabtram") {
cells_per_cluster_firstdrug$Condition[[i]] <- "cocl2first"
}
if (cells_per_cluster$Condition[[i]] == "cistocis" || cells_per_cluster$Condition[[i]] == "cistococl2" || cells_per_cluster$Condition[[i]] == "cistodabtram") {
cells_per_cluster_firstdrug$Condition[[i]] <- "cisfirst"
}
}
#now colored based on second drug only
for (i in 1:(max(cells_per_cluster_seconddrug$Cluster))){
pdf(paste0('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/end_condition_second_timepoint', i-1, '.pdf'))
currentcluster <- filter(cells_per_cluster_seconddrug, cells_per_cluster_seconddrug$Cluster == i)
cells_per_cluster_list[[paste0('Cluster', i)]] <- data.frame(table(currentcluster$Condition))
print(ggplot(cells_per_cluster_list[[paste0('Cluster', i)]], aes(x ='', y = Freq, fill = Var1)) + geom_bar(position = 'fill', stat = 'identity') + scale_fill_manual(values = c('dabtramsecond' = '#623594', 'cocl2second' = '#0F8241', 'cissecond' = '#C96D29')) + labs(title = paste('End condition of cells in second timepoint cluster', i-1)))
dev.off()
}
#now colored based on first drug only
for (i in 1:(max(cells_per_cluster_firstdrug$Cluster))){
pdf(paste0('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/first_condition_second_timepoint', i-1, '.pdf'))
currentcluster <- filter(cells_per_cluster_firstdrug, cells_per_cluster_firstdrug$Cluster == i)
cells_per_cluster_list[[paste0('Cluster', i)]] <- data.frame(table(currentcluster$Condition))
print(ggplot(cells_per_cluster_list[[paste0('Cluster', i)]], aes(x ='', y = Freq, fill = Var1)) + geom_bar(position = 'fill', stat = 'identity') + scale_fill_manual(values = c('dabtramfirst' = '#623594', 'cocl2first' = '#0F8241', 'cisfirst' = '#C96D29')) + labs(title = paste('End condition of cells in first timepoint cluster', i-1)))
dev.off()
}
#UMAPs highlighting all cells that had same first drug in first_timepoint object
cells_per_condition <- list()
for (i in c('dabtramtodabtram', 'dabtramtococl2', 'dabtramtocis', 'cocl2todabtram', 'cocl2tococl2', 'cocl2tocis', 'cistodabtram', 'cistococl2', 'cistocis')){
cells_per_condition[[paste0(i, '_cells')]] <- names(second_timepoint$orig.ident[second_timepoint$OG_condition == i])
}
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/drug_group_highlights.pdf')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$dabtramtodabtram_cells, cells_per_condition$dabtramtocis_cells, cells_per_condition$dabtramtococl2_cells),
cols.highlight = ('red')) + ggtitle('dabtram first cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cocl2tococl2_cells, cells_per_condition$cocl2tocis_cells, cells_per_condition$cocl2todabtram_cells),
cols.highlight = ('red')) + ggtitle('cocl2 first cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cis_cells, cells_per_condition$cistocis_cells, cells_per_condition$cistococl2_cells, cells_per_condition$cistodabtram_cells),
cols.highlight = ('red')) + ggtitle('cis first cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$dabtramtodabtram_cells, cells_per_condition$cistodabtram_cells, cells_per_condition$cocl2todabtram_cells),
cols.highlight = ('blue')) + ggtitle('dabtram second cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cocl2tococl2_cells, cells_per_condition$cistococl2_cells, cells_per_condition$dabtramtococl2_cells),
cols.highlight = ('blue')) + ggtitle('cocl2 second cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cis_cells, cells_per_condition$cistocis_cells, cells_per_condition$cocl2tocis_cells, cells_per_condition$dabtramtocis_cells),
cols.highlight = ('blue')) + ggtitle('cis second cells')
dev.off()
null device
1
#More plots not in PDF but just for quick visualizations
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/drug_group_highlights_first_timepoint.pdf')
DimPlot(first_timepoint, reduction = "umap", dims = c(1,2), group.by = "OG_condition", pt.size = 2, cols = c('dabtram' = '#623594', 'cocl2' = '#0F8241', 'cis' = '#C96D29'))
DimPlot(first_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(names(first_timepoint$orig.ident[first_timepoint$OG_condition == 'dabtram'])),
cols.highlight = ('red')) + ggtitle('dabtram first cells')
DimPlot(first_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(names(first_timepoint$orig.ident[first_timepoint$OG_condition == 'cocl2'])),
cols.highlight = ('red')) + ggtitle('cocl2 first cells')
DimPlot(first_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(names(first_timepoint$orig.ident[first_timepoint$OG_condition == 'cis'])),
cols.highlight = ('red')) + ggtitle('cis first cells')
FeaturePlot(first_timepoint, features = 'EGFR', pt.size = 2) +
scale_colour_gradientn(colors = rev(brewer.pal(n = 11, name = 'RdBu')))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
FeaturePlot(first_timepoint, features = 'NGFR', pt.size = 2) +
scale_colour_gradientn(colors = rev(brewer.pal(n = 11, name = 'RdBu')))
Scale for 'colour' is already present. Adding another scale for 'colour', which will replace the existing scale.
dev.off()
null device
1
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cocl2tococl2_cells, cells_per_condition$cocl2tocis_cells, cells_per_condition$cocl2todabtram_cells),
cols.highlight = c('#10413b', '#6dc49c', '#6abd45')) + ggtitle('cocl2 first cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cocl2tococl2_cells, cells_per_condition$cocl2tocis_cells, cells_per_condition$cocl2todabtram_cells),
cols.highlight = ('#0f8241')) + ggtitle('cocl2 first cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cocl2tococl2_cells, cells_per_condition$cistococl2_cells, cells_per_condition$dabtramtococl2_cells),
cols.highlight = c('#a2248e', '#f49129', '#6abd45')) + ggtitle('cocl2 second cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cocl2tococl2_cells, cells_per_condition$cistococl2_cells, cells_per_condition$dabtramtococl2_cells),
cols.highlight = ('#0f8241')) + ggtitle('cocl2 second cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cis_cells, cells_per_condition$cistocis_cells, cells_per_condition$cistococl2_cells, cells_per_condition$cistodabtram_cells),
cols.highlight = c('#a23622', '#f49129', '#fbd08c')) + ggtitle('cis first cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cis_cells, cells_per_condition$cistocis_cells, cells_per_condition$cistococl2_cells, cells_per_condition$cistodabtram_cells),
cols.highlight = ('#c96d29')) + ggtitle('cis first cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cis_cells, cells_per_condition$cistocis_cells, cells_per_condition$cocl2tocis_cells, cells_per_condition$dabtramtocis_cells),
cols.highlight = c('#9d85be', '#6dc49c', '#fbd08c')) + ggtitle('cis second cells')
DimPlot(second_timepoint, reduction = "umap", dims = c(1,2), group.by = 'OG_condition', pt.size = .1,
cells.highlight = list(cells_per_condition$cis_cells, cells_per_condition$cistocis_cells, cells_per_condition$cocl2tocis_cells, cells_per_condition$dabtramtocis_cells),
cols.highlight = ('#c96d29')) + ggtitle('cis second cells')
save.image('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/workspace.RData')
save.image('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/workspace.RData')
# Define manhatten distance function
manhattan.distance <- function(x, y) return(sum(abs(x-y)))
# Make metadata objects of the first and second conditions
second_timepoint$first_treatment <- sapply(strsplit(second_timepoint$OG_condition, "to"), "[[", 1)
second_timepoint$second_treatment <- sapply(strsplit(second_timepoint$OG_condition, "to"), "[[", 2)
# Get input data and subest
input_data <- GetAssayData(second_timepoint, assay = 'RNA', slot = 'scale.data')
# Number of cells for subsetting
num_cells <- 10000
# Look at grouping based on first sample
Idents(second_timepoint) <- second_timepoint$first_treatment
DimPlot(second_timepoint)
cocl2_first_cells <- sample(names(second_timepoint$first_treatment)[second_timepoint$first_treatment == 'cocl2'], num_cells)
cocl2_first_subset <- input_data[,cocl2_first_cells]
cocl2_first_manhatten_distance <- CustomDistance(cocl2_first_subset, manhattan.distance)
dabtram_first_cells <- sample(names(second_timepoint$first_treatment)[second_timepoint$first_treatment == 'dabtram'], num_cells)
dabtram_first_subset <- input_data[,dabtram_first_cells]
dabtram_first_manhatten_distance <- CustomDistance(dabtram_first_subset, manhattan.distance)
cis_first_cells <- sample(names(second_timepoint$first_treatment)[second_timepoint$first_treatment == 'cis'], num_cells)
cis_first_subset <- input_data[,cis_first_cells]
cis_first_manhatten_distance <- CustomDistance(cis_first_subset, manhattan.distance)
Idents(second_timepoint) <- second_timepoint$second_treatment
DimPlot(second_timepoint)
cocl2_second_cells <- sample(names(second_timepoint$second_treatment)[second_timepoint$second_treatment == 'cocl2'], num_cells)
cocl2_second_subset <- input_data[,cocl2_second_cells]
cocl2_second_manhatten_distance <- CustomDistance(cocl2_second_subset, manhattan.distance)
dabtram_second_cells <- sample(names(second_timepoint$second_treatment)[second_timepoint$second_treatment == 'dabtram'], num_cells)
dabtram_second_subset <- input_data[,dabtram_second_cells]
dabtram_second_manhatten_distance <- CustomDistance(dabtram_second_subset, manhattan.distance)
cis_second_cells <- sample(names(second_timepoint$second_treatment)[second_timepoint$second_treatment == 'cis'], num_cells)
cis_second_subset <- input_data[,cis_second_cells]
cis_second_manhatten_distance <- CustomDistance(cis_second_subset, manhattan.distance)
plotting_df <- data.frame(Grouping = c(rep('first',3*length(cis_first_manhatten_distance)),rep('second', 3*(length(cis_second_manhatten_distance)))),
Manhatten_dist <- c(cocl2_first_manhatten_distance, dabtram_first_manhatten_distance, cis_first_manhatten_distance, cocl2_second_manhatten_distance,dabtram_second_manhatten_distance, cis_second_manhatten_distance),
Treatment = c(rep('CoCl2 first',length(cocl2_first_manhatten_distance)),rep('Dab/Tram first',length(dabtram_first_manhatten_distance)),rep('Cisplatin first',length(cis_first_manhatten_distance)),rep('CoCl2 second',length(cocl2_second_manhatten_distance)),rep('Dab/Tram second',length(dabtram_second_manhatten_distance)),rep('Cisplatin second',length(cis_second_manhatten_distance))))
ggplot(plotting_df, aes(x = Treatment, y = Manhatten_dist, fill = Grouping)) + geom_violin() + geom_boxplot(width = 0.1)
save.image('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/workspace_v2.RData')
#load('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/workspace_v2.RData')
rm(manhatten.distance, cocl2_first_manhatten_distance, dabtram_first_manhatten_distance, cis_first_manhatten_distance, cocl2_second_manhatten_distance, dabtram_second_manhatten_distance, cis_second_manhatten_distance)
Warning in rm(manhatten.distance, cocl2_first_manhatten_distance, dabtram_first_manhatten_distance, :
object 'manhatten.distance' not found
rm(cis, cistocis, cistococl2, cistodabtram, cocl2, cocl2tocis, cocl2tococl2, cocl2todabtram, dabtram, dabtramtocis, dabtramtococl2, dabtramtodabtram)
gc()
used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 4100917 219.1 6815804 364.1 NA 6815804 364.1
Vcells 1630180427 12437.3 7909077067 60341.5 102400 9886346333 75426.9
# Make metadata objects of the first and second conditions
second_timepoint$first_treatment <- sapply(strsplit(second_timepoint$OG_condition, "to"), "[[", 1)
second_timepoint$second_treatment <- sapply(strsplit(second_timepoint$OG_condition, "to"), "[[", 2)
# Get input data and subest
input_data <- GetAssayData(second_timepoint, assay = 'RNA', slot = 'scale.data')
# Look at grouping based on first sample
Idents(second_timepoint) <- second_timepoint$first_treatment
DimPlot(second_timepoint)
cocl2_first_cells <- names(second_timepoint$first_treatment)[second_timepoint$first_treatment == 'cocl2']
cocl2_first_subset <- input_data[,cocl2_first_cells]
cocl2_first_pearson <- cor(cocl2_first_subset)
cocl2_first_pearson_filt <- cocl2_first_pearson[lower.tri(cocl2_first_pearson, diag = FALSE)]
set.seed(1)
cocl2_first_subset_rand <- input_data[,sample(colnames(input_data),length(cocl2_first_cells))]
cocl2_first_pearson_rand <- cor(cocl2_first_subset_rand)
cocl2_first_pearson_filt_rand <- cocl2_first_pearson_rand[lower.tri(cocl2_first_pearson_rand, diag = FALSE)]
dabtram_first_cells <- names(second_timepoint$first_treatment)[second_timepoint$first_treatment == 'dabtram']
dabtram_first_subset <- input_data[,dabtram_first_cells]
dabtram_first_pearson <- cor(dabtram_first_subset)
dabtram_first_pearson_filt <- dabtram_first_pearson[lower.tri(dabtram_first_pearson, diag = FALSE)]
set.seed(1)
dabtram_first_subset_rand <- input_data[,sample(colnames(input_data),length(dabtram_first_cells))]
dabtram_first_pearson_rand <- cor(dabtram_first_subset_rand)
dabtram_first_pearson_filt_rand <- dabtram_first_pearson_rand[lower.tri(dabtram_first_pearson_rand, diag = FALSE)]
cis_first_cells <- names(second_timepoint$first_treatment)[second_timepoint$first_treatment == 'cis']
cis_first_subset <- input_data[,cis_first_cells]
cis_first_pearson <- cor(cis_first_subset)
cis_first_pearson_filt <- cis_first_pearson[lower.tri(cis_first_pearson, diag = FALSE)]
set.seed(1)
cis_first_subset_rand <- input_data[,sample(colnames(input_data),length(cis_first_cells))]
cis_first_pearson_rand <- cor(cis_first_subset_rand)
cis_first_pearson_filt_rand <- cis_first_pearson_rand[lower.tri(cis_first_pearson_rand, diag = FALSE)]
Idents(second_timepoint) <- second_timepoint$second_treatment
DimPlot(second_timepoint)
cocl2_second_cells <- names(second_timepoint$second_treatment)[second_timepoint$second_treatment == 'cocl2']
cocl2_second_subset <- input_data[,cocl2_second_cells]
cocl2_second_pearson <- cor(cocl2_second_subset)
cocl2_second_pearson_filt <- cocl2_second_pearson[lower.tri(cocl2_second_pearson, diag = FALSE)]
set.seed(1)
cocl2_second_subset_rand <- input_data[,sample(colnames(input_data),length(cocl2_second_cells))]
cocl2_second_pearson_rand <- cor(cocl2_second_subset_rand)
cocl2_second_pearson_filt_rand <- cocl2_second_pearson_rand[lower.tri(cocl2_second_pearson_rand, diag = FALSE)]
dabtram_second_cells <- names(second_timepoint$second_treatment)[second_timepoint$second_treatment == 'dabtram']
dabtram_second_subset <- input_data[,dabtram_second_cells]
dabtram_second_pearson <- cor(dabtram_second_subset)
dabtram_second_pearson_filt <- dabtram_second_pearson[lower.tri(dabtram_second_pearson, diag = FALSE)]
set.seed(1)
dabtram_second_subset_rand <- input_data[,sample(colnames(input_data),length(dabtram_second_cells))]
dabtram_second_pearson_rand <- cor(dabtram_second_subset_rand)
dabtram_second_pearson_filt_rand <- dabtram_second_pearson_rand[lower.tri(dabtram_second_pearson_rand, diag = FALSE)]
cis_second_cells <- names(second_timepoint$second_treatment)[second_timepoint$second_treatment == 'cis']
cis_second_subset <- input_data[,cis_second_cells]
cis_second_pearson <- cor(cis_second_subset)
cis_second_pearson_filt <- cis_second_pearson[lower.tri(cis_second_pearson, diag = FALSE)]
set.seed(1)
cis_second_subset_rand <- input_data[,sample(colnames(input_data),length(cis_second_cells))]
cis_second_pearson_rand <- cor(cis_second_subset_rand)
cis_second_pearson_filt_rand <- cis_second_pearson_rand[lower.tri(cis_second_pearson_rand, diag = FALSE)]
save.image('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/workspace_v3.RData')
# Save CoCl2 outputs
save(cocl2_first_pearson, cocl2_first_pearson_rand, cocl2_first_subset, cocl2_first_subset_rand, cocl2_first_pearson_filt, cocl2_first_pearson_filt_rand, cocl2_second_pearson, cocl2_second_pearson_rand, cocl2_second_subset, cocl2_second_subset_rand, cocl2_second_pearson_filt, cocl2_second_pearson_filt_rand, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/cocl2_pearson_results.RData')
rm(cocl2_first_pearson, cocl2_first_pearson_rand, cocl2_first_subset, cocl2_first_subset_rand, cocl2_first_pearson_filt, cocl2_first_pearson_filt_rand, cocl2_second_pearson, cocl2_second_pearson_rand, cocl2_second_subset, cocl2_second_subset_rand, cocl2_second_pearson_filt, cocl2_second_pearson_filt_rand)
# Save dabtram outputs
save(dabtram_first_pearson, dabtram_first_pearson_rand, dabtram_first_subset, dabtram_first_subset_rand, dabtram_first_pearson_filt, dabtram_first_pearson_filt_rand, dabtram_second_pearson, dabtram_second_pearson_rand, dabtram_second_subset, dabtram_second_subset_rand, dabtram_second_pearson_filt, dabtram_second_pearson_filt_rand, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/dabtram_pearson_results.RData')
rm(dabtram_first_pearson, dabtram_first_pearson_rand, dabtram_first_subset, dabtram_first_subset_rand, dabtram_first_pearson_filt, dabtram_first_pearson_filt_rand, dabtram_second_pearson, dabtram_second_pearson_rand, dabtram_second_subset, dabtram_second_subset_rand, dabtram_second_pearson_filt, dabtram_second_pearson_filt_rand)
# Save cis outputs
save(cis_first_pearson, cis_first_pearson_rand, cis_first_subset, cis_first_subset_rand, cis_first_pearson_filt, cis_first_pearson_filt_rand, cis_second_pearson, cis_second_pearson_rand, cis_second_subset, cis_second_subset_rand, cis_second_pearson_filt, cis_second_pearson_filt_rand, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/cis_pearson_results.RData')
rm(cis_first_pearson, cis_first_pearson_rand, cis_first_subset, cis_first_subset_rand, cis_first_pearson_filt, cis_first_pearson_filt_rand, cis_second_pearson, cis_second_pearson_rand, cis_second_subset, cis_second_subset_rand, cis_second_pearson_filt, cis_second_pearson_filt_rand)
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/cocl2_pearson_results.RData') # Load data
pearson_ttest_cocl2_firstvsrand <- t.test(cocl2_first_pearson_filt, cocl2_first_pearson_filt_rand, paired = F)
pearson_ttest_cocl2_secondvsrand <- t.test(cocl2_second_pearson_filt, cocl2_second_pearson_filt_rand, paired = F)
pearson_ttest_cocl2_secondvsfirst <- t.test(cocl2_second_pearson_filt, cocl2_first_pearson_filt, paired = F)
pearson_ttest_cocl2_randvsrand <- t.test(cocl2_second_pearson_filt_rand, cocl2_first_pearson_filt_rand, paired = F)
rm(cocl2_first_pearson, cocl2_first_pearson_rand, cocl2_first_subset, cocl2_first_subset_rand, cocl2_second_pearson, cocl2_second_pearson_rand, cocl2_second_subset, cocl2_second_subset_rand)
plotting_df_pearson_cocl2 <- data.frame(Grouping = c(rep('first',length(cocl2_first_pearson_filt)), rep('random',length(cocl2_first_pearson_filt)), rep('second', length(cocl2_second_pearson_filt)), rep('random', length(cocl2_second_pearson_filt))),
Pearson = c(cocl2_first_pearson_filt, cocl2_first_pearson_filt_rand, cocl2_second_pearson_filt, cocl2_second_pearson_filt_rand),
Treatment = c(rep('CoCl2 first',length(cocl2_first_pearson_filt)), rep('CoCl2 first rand', length(cocl2_first_pearson_filt_rand)),rep('CoCl2 second',length(cocl2_second_pearson_filt)),rep('CoCl2 second rand',length(cocl2_second_pearson_filt_rand))))
colnames(plotting_df_pearson_cocl2) <- c('Grouping','Pearson','Treatment')
rm(cocl2_first_pearson_filt, cocl2_first_pearson_filt_rand, cocl2_second_pearson_filt, cocl2_second_pearson_filt_rand)
gc()
used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 4102976 219.2 6815804 364.1 NA 6815804 364.1
Vcells 5659627351 43179.6 11389246976 86893.1 102400 11355547725 86636.0
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/cocl2_pearson_plot_subsampled.pdf')
set.seed(1)
ggplot(plotting_df_pearson_cocl2[sample(1:nrow(plotting_df_pearson_cocl2), 10000000),], aes(x = Treatment, y = Pearson, fill = Grouping)) + geom_violin() + geom_boxplot(width = 0.1, outlier.shape = NA) +
scale_fill_manual(values=c("#FF0000", "#D3D3D3", "#0000FF"))
dev.off()
null device
1
# Save outputs
save(pearson_ttest_cocl2_firstvsrand, pearson_ttest_cocl2_secondvsfirst, pearson_ttest_cocl2_secondvsrand, pearson_ttest_cocl2_randvsrand, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/cocl2_pearson_values.RData')
rm(pearson_ttest_cocl2_firstvsrand, pearson_ttest_cocl2_secondvsfirst, pearson_ttest_cocl2_secondvsrand, pearson_ttest_cocl2_randvsrand)
save(plotting_df_pearson_cocl2, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/cocl2_pearson_plotting_df.RData')
rm(plotting_df_pearson_cocl2)
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/dabtram_pearson_results.RData') # Load data
pearson_ttest_dabtram_firstvsrand <- t.test(dabtram_first_pearson_filt, dabtram_first_pearson_filt_rand, paired = F)
pearson_ttest_dabtram_secondvsrand <- t.test(dabtram_second_pearson_filt, dabtram_second_pearson_filt_rand, paired = F)
pearson_ttest_dabtram_secondvsfirst <- t.test(dabtram_second_pearson_filt, dabtram_first_pearson_filt, paired = F)
pearson_ttest_dabtram_randvsrand <- t.test(dabtram_second_pearson_filt_rand, dabtram_first_pearson_filt_rand, paired = F)
rm(dabtram_first_pearson, dabtram_first_pearson_rand, dabtram_first_subset, dabtram_first_subset_rand, dabtram_second_pearson, dabtram_second_pearson_rand, dabtram_second_subset, dabtram_second_subset_rand)
plotting_df_pearson_dabtram <- data.frame(Grouping = c(rep('first',length(dabtram_first_pearson_filt)), rep('random',length(dabtram_first_pearson_filt)), rep('second', length(dabtram_second_pearson_filt)), rep('random', length(dabtram_second_pearson_filt))),
Pearson = c(dabtram_first_pearson_filt, dabtram_first_pearson_filt_rand, dabtram_second_pearson_filt, dabtram_second_pearson_filt_rand),
Treatment = c(rep('dabtram first',length(dabtram_first_pearson_filt)), rep('dabtram first rand', length(dabtram_first_pearson_filt_rand)),rep('dabtram second',length(dabtram_second_pearson_filt)),rep('dabtram second rand',length(dabtram_second_pearson_filt_rand))))
colnames(plotting_df_pearson_dabtram) <- c('Grouping','Pearson','Treatment')
rm(dabtram_first_pearson_filt, dabtram_first_pearson_filt_rand, dabtram_second_pearson_filt, dabtram_second_pearson_filt_rand)
gc()
used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 4102712 219.2 6815804 364.1 NA 6815804 364.1
Vcells 3678842165 28067.4 11389246976 86893.1 102400 11361287181 86679.8
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/dabtram_pearson_plot_subsampled.pdf')
set.seed(1)
ggplot(plotting_df_pearson_dabtram[sample(1:nrow(plotting_df_pearson_dabtram), 10000000),], aes(x = Treatment, y = Pearson, fill = Grouping)) + geom_violin() + geom_boxplot(width = 0.1, outlier.shape = NA) +
scale_fill_manual(values=c("#FF0000", "#D3D3D3", "#0000FF"))
dev.off()
null device
1
# Save outputs
save(pearson_ttest_dabtram_firstvsrand, pearson_ttest_dabtram_secondvsfirst, pearson_ttest_dabtram_secondvsrand,pearson_ttest_dabtram_randvsrand, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/dabtram_pearson_values.RData')
rm(pearson_ttest_dabtram_firstvsrand, pearson_ttest_dabtram_secondvsfirst, pearson_ttest_dabtram_secondvsrand, pearson_ttest_dabtram_randvsrand)
save(plotting_df_pearson_dabtram, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/dabtram_pearson_plotting_df.RData')
rm(plotting_df_pearson_dabtram)
load('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/cis_pearson_results.RData') # Load data
pearson_ttest_cis_firstvsrand <- t.test(cis_first_pearson_filt, cis_first_pearson_filt_rand, paired = F)
pearson_ttest_cis_secondvsrand <- t.test(cis_second_pearson_filt, cis_second_pearson_filt_rand, paired = F)
pearson_ttest_cis_secondvsfirst <- t.test(cis_second_pearson_filt, cis_first_pearson_filt, paired = F)
pearson_ttest_cis_randvsrand <- t.test(cis_second_pearson_filt_rand, cis_first_pearson_filt_rand, paired = F)
rm(cis_first_pearson, cis_first_pearson_rand, cis_first_subset, cis_first_subset_rand, cis_second_pearson, cis_second_pearson_rand, cis_second_subset, cis_second_subset_rand)
plotting_df_pearson_cis <- data.frame(Grouping = c(rep('first',length(cis_first_pearson_filt)), rep('random',length(cis_first_pearson_filt)), rep('second', length(cis_second_pearson_filt)), rep('random', length(cis_second_pearson_filt))),
Pearson = c(cis_first_pearson_filt, cis_first_pearson_filt_rand, cis_second_pearson_filt, cis_second_pearson_filt_rand),
Treatment = c(rep('cis first',length(cis_first_pearson_filt)), rep('cis first rand', length(cis_first_pearson_filt_rand)),rep('cis second',length(cis_second_pearson_filt)),rep('cis second rand',length(cis_second_pearson_filt_rand))))
colnames(plotting_df_pearson_cis) <- c('Grouping','Pearson','Treatment')
rm(cis_first_pearson_filt, cis_first_pearson_filt_rand, cis_second_pearson_filt, cis_second_pearson_filt_rand)
gc()
used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
Ncells 4102791 219.2 6815804 364.1 NA 6815804 364.1
Vcells 2804429888 21396.2 9111397581 69514.5 102400 11361287181 86679.8
pdf('2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/cis_pearson_plot_subsampled.pdf')
set.seed(1)
ggplot(plotting_df_pearson_cis[sample(1:nrow(plotting_df_pearson_cis), 10000000),], aes(x = Treatment, y = Pearson, fill = Grouping)) + geom_violin() + geom_boxplot(width = 0.1, outlier.shape = NA) +
scale_fill_manual(values=c("#FF0000", "#D3D3D3", "#0000FF"))
dev.off()
null device
1
# Save outputs
save(pearson_ttest_cis_firstvsrand, pearson_ttest_cis_secondvsfirst, pearson_ttest_cis_secondvsrand, pearson_ttest_cis_randvsrand, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/cis_pearson_values.RData')
rm(pearson_ttest_cis_firstvsrand, pearson_ttest_cis_secondvsfirst, pearson_ttest_cis_secondvsrand, pearson_ttest_cis_randvsrand)
save(plotting_df_pearson_cis, file = '2022_01_14_analysis_scripts/2022_05_27_analysis/Condition_Clustering/cis_pearson_plotting_df.RData')
rm(plotting_df_pearson_cis)